#ifndef CUFFTDX_FFT_1331_FP32_FWD_PTX_HPP
#define CUFFTDX_FFT_1331_FP32_FWD_PTX_HPP



template<> __forceinline__ __device__ void cufftdx_private_function<182, float, 1>(cufftdx::detail::complex<float> *rmem, unsigned smem){

asm volatile (R"({
.reg .f32 f<781>;
.reg .b32 r<21>;
.reg .b64 rd<12>;
mov.u32 r1, %tid.y;
mov.u32 r2, %22;
mad.lo.s32 r3, r1, 10648, r2;
add.f32 f45, %27, %51;
add.f32 f46, %29, %52;
sub.f32 f47, %27, %51;
sub.f32 f48, %29, %52;
add.f32 f49, %30, %49;
add.f32 f50, %32, %50;
sub.f32 f51, %30, %49;
sub.f32 f52, %32, %50;
add.f32 f53, %33, %46;
add.f32 f54, %34, %48;
sub.f32 f55, %33, %46;
sub.f32 f56, %34, %48;
add.f32 f57, %35, %43;
add.f32 f58, %37, %45;
sub.f32 f59, %35, %43;
sub.f32 f60, %37, %45;
add.f32 f61, %38, %41;
add.f32 f62, %40, %42;
sub.f32 f63, %38, %41;
sub.f32 f64, %40, %42;
mov.u32 r4, %tid.x;
add.f32 f65, %25, f45;
add.f32 f66, %26, f46;
add.f32 f67, f65, f49;
add.f32 f68, f66, f50;
add.f32 f69, f67, f53;
add.f32 f70, f68, f54;
add.f32 f71, f69, f57;
add.f32 f72, f70, f58;
fma.rn.f32 f73, f45, 0f3F575C64, %25;
fma.rn.f32 f74, f48, 0fBF0A6770, 0f00000000;
fma.rn.f32 f75, f46, 0f3F575C64, %26;
fma.rn.f32 f76, f47, 0fBF0A6770, 0f00000000;
fma.rn.f32 f77, f49, 0f3ED4B147, f73;
fma.rn.f32 f78, f52, 0fBF68DDA4, f74;
fma.rn.f32 f79, f50, 0f3ED4B147, f75;
fma.rn.f32 f80, f51, 0fBF68DDA4, f76;
fma.rn.f32 f81, f53, 0fBE11BAFB, f77;
fma.rn.f32 f82, f56, 0fBF7D64F0, f78;
fma.rn.f32 f83, f54, 0fBE11BAFB, f79;
fma.rn.f32 f84, f55, 0fBF7D64F0, f80;
fma.rn.f32 f85, f57, 0fBF27A4F4, f81;
fma.rn.f32 f86, f60, 0fBF4178CE, f82;
fma.rn.f32 f87, f58, 0fBF27A4F4, f83;
fma.rn.f32 f88, f59, 0fBF4178CE, f84;
fma.rn.f32 f89, f61, 0fBF75A155, f85;
fma.rn.f32 f90, f64, 0fBE903F40, f86;
fma.rn.f32 f91, f62, 0fBF75A155, f87;
fma.rn.f32 f92, f63, 0fBE903F40, f88;
sub.f32 f93, f89, f90;
add.f32 f94, f92, f91;
add.f32 f95, f90, f89;
sub.f32 f96, f91, f92;
fma.rn.f32 f97, f45, 0f3ED4B147, %25;
fma.rn.f32 f98, f48, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f99, f46, 0f3ED4B147, %26;
fma.rn.f32 f100, f47, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f101, f49, 0fBF27A4F4, f97;
fma.rn.f32 f102, f52, 0fBF4178CE, f98;
fma.rn.f32 f103, f50, 0fBF27A4F4, f99;
fma.rn.f32 f104, f51, 0fBF4178CE, f100;
fma.rn.f32 f105, f53, 0fBF75A155, f101;
fma.rn.f32 f106, f56, 0f3E903F40, f102;
fma.rn.f32 f107, f54, 0fBF75A155, f103;
fma.rn.f32 f108, f55, 0f3E903F40, f104;
fma.rn.f32 f109, f57, 0fBE11BAFB, f105;
fma.rn.f32 f110, f60, 0f3F7D64F0, f106;
fma.rn.f32 f111, f58, 0fBE11BAFB, f107;
fma.rn.f32 f112, f59, 0f3F7D64F0, f108;
fma.rn.f32 f113, f61, 0f3F575C64, f109;
fma.rn.f32 f114, f64, 0f3F0A6770, f110;
fma.rn.f32 f115, f62, 0f3F575C64, f111;
fma.rn.f32 f116, f63, 0f3F0A6770, f112;
sub.f32 f117, f113, f114;
add.f32 f118, f116, f115;
add.f32 f119, f114, f113;
sub.f32 f120, f115, f116;
fma.rn.f32 f121, f45, 0fBE11BAFB, %25;
fma.rn.f32 f122, f48, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f123, f46, 0fBE11BAFB, %26;
fma.rn.f32 f124, f47, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f125, f49, 0fBF75A155, f121;
fma.rn.f32 f126, f52, 0f3E903F40, f122;
fma.rn.f32 f127, f50, 0fBF75A155, f123;
fma.rn.f32 f128, f51, 0f3E903F40, f124;
fma.rn.f32 f129, f53, 0f3ED4B147, f125;
fma.rn.f32 f130, f56, 0f3F68DDA4, f126;
fma.rn.f32 f131, f54, 0f3ED4B147, f127;
fma.rn.f32 f132, f55, 0f3F68DDA4, f128;
fma.rn.f32 f133, f57, 0f3F575C64, f129;
fma.rn.f32 f134, f60, 0fBF0A6770, f130;
fma.rn.f32 f135, f58, 0f3F575C64, f131;
fma.rn.f32 f136, f59, 0fBF0A6770, f132;
fma.rn.f32 f137, f61, 0fBF27A4F4, f133;
fma.rn.f32 f138, f64, 0fBF4178CE, f134;
fma.rn.f32 f139, f62, 0fBF27A4F4, f135;
fma.rn.f32 f140, f63, 0fBF4178CE, f136;
sub.f32 f141, f137, f138;
add.f32 f142, f140, f139;
add.f32 f143, f138, f137;
sub.f32 f144, f139, f140;
fma.rn.f32 f145, f45, 0fBF27A4F4, %25;
fma.rn.f32 f146, f48, 0fBF4178CE, 0f00000000;
fma.rn.f32 f147, f46, 0fBF27A4F4, %26;
fma.rn.f32 f148, f47, 0fBF4178CE, 0f00000000;
fma.rn.f32 f149, f49, 0fBE11BAFB, f145;
fma.rn.f32 f150, f52, 0f3F7D64F0, f146;
fma.rn.f32 f151, f50, 0fBE11BAFB, f147;
fma.rn.f32 f152, f51, 0f3F7D64F0, f148;
fma.rn.f32 f153, f53, 0f3F575C64, f149;
fma.rn.f32 f154, f56, 0fBF0A6770, f150;
fma.rn.f32 f155, f54, 0f3F575C64, f151;
fma.rn.f32 f156, f55, 0fBF0A6770, f152;
fma.rn.f32 f157, f57, 0fBF75A155, f153;
fma.rn.f32 f158, f60, 0fBE903F40, f154;
fma.rn.f32 f159, f58, 0fBF75A155, f155;
fma.rn.f32 f160, f59, 0fBE903F40, f156;
fma.rn.f32 f161, f61, 0f3ED4B147, f157;
fma.rn.f32 f162, f64, 0f3F68DDA4, f158;
fma.rn.f32 f163, f62, 0f3ED4B147, f159;
fma.rn.f32 f164, f63, 0f3F68DDA4, f160;
sub.f32 f165, f161, f162;
add.f32 f166, f164, f163;
add.f32 f167, f162, f161;
sub.f32 f168, f163, f164;
fma.rn.f32 f169, f45, 0fBF75A155, %25;
fma.rn.f32 f170, f48, 0fBE903F40, 0f00000000;
fma.rn.f32 f171, f46, 0fBF75A155, %26;
fma.rn.f32 f172, f47, 0fBE903F40, 0f00000000;
fma.rn.f32 f173, f49, 0f3F575C64, f169;
fma.rn.f32 f174, f52, 0f3F0A6770, f170;
fma.rn.f32 f175, f50, 0f3F575C64, f171;
fma.rn.f32 f176, f51, 0f3F0A6770, f172;
fma.rn.f32 f177, f53, 0fBF27A4F4, f173;
fma.rn.f32 f178, f56, 0fBF4178CE, f174;
fma.rn.f32 f179, f54, 0fBF27A4F4, f175;
fma.rn.f32 f180, f55, 0fBF4178CE, f176;
fma.rn.f32 f181, f57, 0f3ED4B147, f177;
fma.rn.f32 f182, f60, 0f3F68DDA4, f178;
fma.rn.f32 f183, f58, 0f3ED4B147, f179;
fma.rn.f32 f184, f59, 0f3F68DDA4, f180;
fma.rn.f32 f185, f61, 0fBE11BAFB, f181;
fma.rn.f32 f186, f64, 0fBF7D64F0, f182;
fma.rn.f32 f187, f62, 0fBE11BAFB, f183;
fma.rn.f32 f188, f63, 0fBF7D64F0, f184;
sub.f32 f189, f185, f186;
add.f32 f190, f188, f187;
add.f32 f191, f186, f185;
sub.f32 f192, f187, f188;
mul.wide.u32 rd2, r4, 248469183;
shr.u64 rd3, rd2, 32;
cvt.u32.u64 r5, rd3;
sub.s32 r6, r4, r5;
shr.u32 r7, r6, 1;
add.s32 r8, r7, r5;
shr.u32 r9, r8, 6;
mul.lo.s32 r10, r9, 121;
sub.s32 r11, r4, r10;
mad.lo.s32 r12, r9, 10648, r3;
mul.wide.u32 rd4, r11, 8;
mov.u64 rd5, %23;
add.s64 rd6, rd5, rd4;
ld.global.v2.f32 {f193, f194}, [rd6];
mul.f32 f197, f193, f93;
mul.f32 f198, f194, f94;
mul.f32 f199, f193, f94;
mul.f32 f200, f193, f193;
mul.f32 f201, f194, f194;
sub.f32 f202, f200, f201;
mul.f32 f203, f194, f193;
fma.rn.f32 f204, f194, f193, f203;
mul.f32 f205, f202, f117;
mul.f32 f206, f204, f118;
mul.f32 f207, f202, f118;
mul.f32 f208, f193, f202;
mul.f32 f209, f194, f204;
sub.f32 f210, f208, f209;
mul.f32 f211, f193, f204;
fma.rn.f32 f212, f194, f202, f211;
mul.f32 f213, f210, f141;
mul.f32 f214, f212, f142;
mul.f32 f215, f210, f142;
mul.f32 f216, f193, f210;
mul.f32 f217, f194, f212;
sub.f32 f218, f216, f217;
mul.f32 f219, f193, f212;
fma.rn.f32 f220, f194, f210, f219;
mul.f32 f221, f218, f165;
mul.f32 f222, f220, f166;
mul.f32 f223, f218, f166;
mul.f32 f224, f193, f218;
mul.f32 f225, f194, f220;
sub.f32 f226, f224, f225;
mul.f32 f227, f193, f220;
fma.rn.f32 f228, f194, f218, f227;
mul.f32 f229, f226, f189;
mul.f32 f230, f228, f190;
mul.f32 f231, f226, f190;
mul.f32 f232, f193, f226;
mul.f32 f233, f194, f228;
sub.f32 f234, f232, f233;
mul.f32 f235, f193, f228;
fma.rn.f32 f236, f194, f226, f235;
mul.f32 f237, f234, f191;
mul.f32 f238, f236, f192;
mul.f32 f239, f234, f192;
mul.f32 f240, f193, f234;
mul.f32 f241, f194, f236;
sub.f32 f242, f240, f241;
mul.f32 f243, f193, f236;
fma.rn.f32 f244, f194, f234, f243;
mul.f32 f245, f242, f167;
mul.f32 f246, f244, f168;
mul.f32 f247, f242, f168;
mul.f32 f248, f193, f242;
mul.f32 f249, f194, f244;
sub.f32 f250, f248, f249;
mul.f32 f251, f193, f244;
fma.rn.f32 f252, f194, f242, f251;
mul.f32 f253, f250, f143;
mul.f32 f254, f252, f144;
mul.f32 f255, f250, f144;
mul.f32 f256, f193, f250;
mul.f32 f257, f194, f252;
sub.f32 f258, f256, f257;
mul.f32 f259, f193, f252;
fma.rn.f32 f260, f194, f250, f259;
mul.f32 f261, f258, f119;
mul.f32 f262, f260, f120;
mul.f32 f263, f258, f120;
mul.f32 f264, f193, f258;
mul.f32 f265, f194, f260;
sub.f32 f266, f264, f265;
mul.f32 f267, f193, f260;
fma.rn.f32 f268, f194, f258, f267;
mul.f32 f269, f266, f95;
mul.f32 f270, f268, f96;
mul.f32 f271, f266, f96;
barrier.sync 0;
mad.lo.s32 r13, r11, 88, r12;
add.f32 f272, f72, f62;
add.f32 f273, f71, f61;
st.shared.v2.f32 [r13], {f273, f272};
fma.rn.f32 f274, f194, f93, f199;
sub.f32 f275, f197, f198;
st.shared.v2.f32 [r13+8], {f275, f274};
fma.rn.f32 f276, f204, f117, f207;
sub.f32 f277, f205, f206;
st.shared.v2.f32 [r13+16], {f277, f276};
sub.f32 f278, f213, f214;
fma.rn.f32 f279, f212, f141, f215;
st.shared.v2.f32 [r13+24], {f278, f279};
fma.rn.f32 f280, f220, f165, f223;
sub.f32 f281, f221, f222;
st.shared.v2.f32 [r13+32], {f281, f280};
fma.rn.f32 f282, f228, f189, f231;
sub.f32 f283, f229, f230;
st.shared.v2.f32 [r13+40], {f283, f282};
fma.rn.f32 f284, f236, f191, f239;
sub.f32 f285, f237, f238;
st.shared.v2.f32 [r13+48], {f285, f284};
fma.rn.f32 f286, f244, f167, f247;
sub.f32 f287, f245, f246;
st.shared.v2.f32 [r13+56], {f287, f286};
fma.rn.f32 f288, f252, f143, f255;
sub.f32 f289, f253, f254;
st.shared.v2.f32 [r13+64], {f289, f288};
fma.rn.f32 f290, f260, f119, f263;
sub.f32 f291, f261, f262;
st.shared.v2.f32 [r13+72], {f291, f290};
fma.rn.f32 f292, f268, f95, f271;
sub.f32 f293, f269, f270;
st.shared.v2.f32 [r13+80], {f293, f292};
barrier.sync 0;
mad.lo.s32 r14, r11, -80, r13;
ld.shared.v2.f32 {f294, f295}, [r14];
ld.shared.v2.f32 {f298, f299}, [r14+968];
ld.shared.v2.f32 {f302, f303}, [r14+1936];
ld.shared.v2.f32 {f306, f307}, [r14+2904];
ld.shared.v2.f32 {f310, f311}, [r14+3872];
ld.shared.v2.f32 {f314, f315}, [r14+4840];
ld.shared.v2.f32 {f318, f319}, [r14+5808];
ld.shared.v2.f32 {f322, f323}, [r14+6776];
ld.shared.v2.f32 {f326, f327}, [r14+7744];
ld.shared.v2.f32 {f330, f331}, [r14+8712];
ld.shared.v2.f32 {f334, f335}, [r14+9680];
add.f32 f338, f298, f334;
add.f32 f339, f299, f335;
sub.f32 f340, f298, f334;
sub.f32 f341, f299, f335;
add.f32 f342, f302, f330;
add.f32 f343, f303, f331;
sub.f32 f344, f302, f330;
sub.f32 f345, f303, f331;
add.f32 f346, f306, f326;
add.f32 f347, f307, f327;
sub.f32 f348, f306, f326;
sub.f32 f349, f307, f327;
add.f32 f350, f310, f322;
add.f32 f351, f311, f323;
sub.f32 f352, f310, f322;
sub.f32 f353, f311, f323;
add.f32 f354, f314, f318;
add.f32 f355, f315, f319;
sub.f32 f356, f314, f318;
sub.f32 f357, f315, f319;
add.f32 f358, f294, f338;
add.f32 f359, f295, f339;
add.f32 f360, f358, f342;
add.f32 f361, f359, f343;
add.f32 f362, f360, f346;
add.f32 f363, f361, f347;
add.f32 f364, f362, f350;
add.f32 f365, f363, f351;
fma.rn.f32 f366, f338, 0f3F575C64, f294;
fma.rn.f32 f367, f341, 0fBF0A6770, 0f00000000;
fma.rn.f32 f368, f339, 0f3F575C64, f295;
fma.rn.f32 f369, f340, 0fBF0A6770, 0f00000000;
fma.rn.f32 f370, f342, 0f3ED4B147, f366;
fma.rn.f32 f371, f345, 0fBF68DDA4, f367;
fma.rn.f32 f372, f343, 0f3ED4B147, f368;
fma.rn.f32 f373, f344, 0fBF68DDA4, f369;
fma.rn.f32 f374, f346, 0fBE11BAFB, f370;
fma.rn.f32 f375, f349, 0fBF7D64F0, f371;
fma.rn.f32 f376, f347, 0fBE11BAFB, f372;
fma.rn.f32 f377, f348, 0fBF7D64F0, f373;
fma.rn.f32 f378, f350, 0fBF27A4F4, f374;
fma.rn.f32 f379, f353, 0fBF4178CE, f375;
fma.rn.f32 f380, f351, 0fBF27A4F4, f376;
fma.rn.f32 f381, f352, 0fBF4178CE, f377;
fma.rn.f32 f382, f354, 0fBF75A155, f378;
fma.rn.f32 f383, f357, 0fBE903F40, f379;
fma.rn.f32 f384, f355, 0fBF75A155, f380;
fma.rn.f32 f385, f356, 0fBE903F40, f381;
sub.f32 f386, f382, f383;
add.f32 f387, f385, f384;
add.f32 f388, f383, f382;
sub.f32 f389, f384, f385;
fma.rn.f32 f390, f338, 0f3ED4B147, f294;
fma.rn.f32 f391, f341, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f392, f339, 0f3ED4B147, f295;
fma.rn.f32 f393, f340, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f394, f342, 0fBF27A4F4, f390;
fma.rn.f32 f395, f345, 0fBF4178CE, f391;
fma.rn.f32 f396, f343, 0fBF27A4F4, f392;
fma.rn.f32 f397, f344, 0fBF4178CE, f393;
fma.rn.f32 f398, f346, 0fBF75A155, f394;
fma.rn.f32 f399, f349, 0f3E903F40, f395;
fma.rn.f32 f400, f347, 0fBF75A155, f396;
fma.rn.f32 f401, f348, 0f3E903F40, f397;
fma.rn.f32 f402, f350, 0fBE11BAFB, f398;
fma.rn.f32 f403, f353, 0f3F7D64F0, f399;
fma.rn.f32 f404, f351, 0fBE11BAFB, f400;
fma.rn.f32 f405, f352, 0f3F7D64F0, f401;
fma.rn.f32 f406, f354, 0f3F575C64, f402;
fma.rn.f32 f407, f357, 0f3F0A6770, f403;
fma.rn.f32 f408, f355, 0f3F575C64, f404;
fma.rn.f32 f409, f356, 0f3F0A6770, f405;
sub.f32 f410, f406, f407;
add.f32 f411, f409, f408;
add.f32 f412, f407, f406;
sub.f32 f413, f408, f409;
fma.rn.f32 f414, f338, 0fBE11BAFB, f294;
fma.rn.f32 f415, f341, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f416, f339, 0fBE11BAFB, f295;
fma.rn.f32 f417, f340, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f418, f342, 0fBF75A155, f414;
fma.rn.f32 f419, f345, 0f3E903F40, f415;
fma.rn.f32 f420, f343, 0fBF75A155, f416;
fma.rn.f32 f421, f344, 0f3E903F40, f417;
fma.rn.f32 f422, f346, 0f3ED4B147, f418;
fma.rn.f32 f423, f349, 0f3F68DDA4, f419;
fma.rn.f32 f424, f347, 0f3ED4B147, f420;
fma.rn.f32 f425, f348, 0f3F68DDA4, f421;
fma.rn.f32 f426, f350, 0f3F575C64, f422;
fma.rn.f32 f427, f353, 0fBF0A6770, f423;
fma.rn.f32 f428, f351, 0f3F575C64, f424;
fma.rn.f32 f429, f352, 0fBF0A6770, f425;
fma.rn.f32 f430, f354, 0fBF27A4F4, f426;
fma.rn.f32 f431, f357, 0fBF4178CE, f427;
fma.rn.f32 f432, f355, 0fBF27A4F4, f428;
fma.rn.f32 f433, f356, 0fBF4178CE, f429;
sub.f32 f434, f430, f431;
add.f32 f435, f433, f432;
add.f32 f436, f431, f430;
sub.f32 f437, f432, f433;
fma.rn.f32 f438, f338, 0fBF27A4F4, f294;
fma.rn.f32 f439, f341, 0fBF4178CE, 0f00000000;
fma.rn.f32 f440, f339, 0fBF27A4F4, f295;
fma.rn.f32 f441, f340, 0fBF4178CE, 0f00000000;
fma.rn.f32 f442, f342, 0fBE11BAFB, f438;
fma.rn.f32 f443, f345, 0f3F7D64F0, f439;
fma.rn.f32 f444, f343, 0fBE11BAFB, f440;
fma.rn.f32 f445, f344, 0f3F7D64F0, f441;
fma.rn.f32 f446, f346, 0f3F575C64, f442;
fma.rn.f32 f447, f349, 0fBF0A6770, f443;
fma.rn.f32 f448, f347, 0f3F575C64, f444;
fma.rn.f32 f449, f348, 0fBF0A6770, f445;
fma.rn.f32 f450, f350, 0fBF75A155, f446;
fma.rn.f32 f451, f353, 0fBE903F40, f447;
fma.rn.f32 f452, f351, 0fBF75A155, f448;
fma.rn.f32 f453, f352, 0fBE903F40, f449;
fma.rn.f32 f454, f354, 0f3ED4B147, f450;
fma.rn.f32 f455, f357, 0f3F68DDA4, f451;
fma.rn.f32 f456, f355, 0f3ED4B147, f452;
fma.rn.f32 f457, f356, 0f3F68DDA4, f453;
sub.f32 f458, f454, f455;
add.f32 f459, f457, f456;
add.f32 f460, f455, f454;
sub.f32 f461, f456, f457;
fma.rn.f32 f462, f338, 0fBF75A155, f294;
fma.rn.f32 f463, f341, 0fBE903F40, 0f00000000;
fma.rn.f32 f464, f339, 0fBF75A155, f295;
fma.rn.f32 f465, f340, 0fBE903F40, 0f00000000;
fma.rn.f32 f466, f342, 0f3F575C64, f462;
fma.rn.f32 f467, f345, 0f3F0A6770, f463;
fma.rn.f32 f468, f343, 0f3F575C64, f464;
fma.rn.f32 f469, f344, 0f3F0A6770, f465;
fma.rn.f32 f470, f346, 0fBF27A4F4, f466;
fma.rn.f32 f471, f349, 0fBF4178CE, f467;
fma.rn.f32 f472, f347, 0fBF27A4F4, f468;
fma.rn.f32 f473, f348, 0fBF4178CE, f469;
fma.rn.f32 f474, f350, 0f3ED4B147, f470;
fma.rn.f32 f475, f353, 0f3F68DDA4, f471;
fma.rn.f32 f476, f351, 0f3ED4B147, f472;
fma.rn.f32 f477, f352, 0f3F68DDA4, f473;
fma.rn.f32 f478, f354, 0fBE11BAFB, f474;
fma.rn.f32 f479, f357, 0fBF7D64F0, f475;
fma.rn.f32 f480, f355, 0fBE11BAFB, f476;
fma.rn.f32 f481, f356, 0fBF7D64F0, f477;
sub.f32 f482, f478, f479;
add.f32 f483, f481, f480;
add.f32 f484, f479, f478;
sub.f32 f485, f480, f481;
mul.wide.u32 rd7, r11, -1171354717;
shr.u64 rd8, rd7, 35;
cvt.u32.u64 r15, rd8;
mul.lo.s32 r16, r15, 11;
sub.s32 r17, r11, r16;
mul.wide.u32 rd9, r15, 8;
mov.u64 rd10, %24;
add.s64 rd11, rd10, rd9;
ld.global.v2.f32 {f486, f487}, [rd11];
mul.f32 f490, f486, f386;
mul.f32 f491, f487, f387;
mul.f32 f492, f486, f387;
mul.f32 f493, f486, f486;
mul.f32 f494, f487, f487;
sub.f32 f495, f493, f494;
mul.f32 f496, f487, f486;
fma.rn.f32 f497, f487, f486, f496;
mul.f32 f498, f495, f410;
mul.f32 f499, f497, f411;
mul.f32 f500, f495, f411;
mul.f32 f501, f486, f495;
mul.f32 f502, f487, f497;
sub.f32 f503, f501, f502;
mul.f32 f504, f486, f497;
fma.rn.f32 f505, f487, f495, f504;
mul.f32 f506, f503, f434;
mul.f32 f507, f505, f435;
mul.f32 f508, f503, f435;
mul.f32 f509, f486, f503;
mul.f32 f510, f487, f505;
sub.f32 f511, f509, f510;
mul.f32 f512, f486, f505;
fma.rn.f32 f513, f487, f503, f512;
mul.f32 f514, f511, f458;
mul.f32 f515, f513, f459;
mul.f32 f516, f511, f459;
mul.f32 f517, f486, f511;
mul.f32 f518, f487, f513;
sub.f32 f519, f517, f518;
mul.f32 f520, f486, f513;
fma.rn.f32 f521, f487, f511, f520;
mul.f32 f522, f519, f482;
mul.f32 f523, f521, f483;
mul.f32 f524, f519, f483;
mul.f32 f525, f486, f519;
mul.f32 f526, f487, f521;
sub.f32 f527, f525, f526;
mul.f32 f528, f486, f521;
fma.rn.f32 f529, f487, f519, f528;
mul.f32 f530, f527, f484;
mul.f32 f531, f529, f485;
mul.f32 f532, f527, f485;
mul.f32 f533, f486, f527;
mul.f32 f534, f487, f529;
sub.f32 f535, f533, f534;
mul.f32 f536, f486, f529;
fma.rn.f32 f537, f487, f527, f536;
mul.f32 f538, f535, f460;
mul.f32 f539, f537, f461;
mul.f32 f540, f535, f461;
mul.f32 f541, f486, f535;
mul.f32 f542, f487, f537;
sub.f32 f543, f541, f542;
mul.f32 f544, f486, f537;
fma.rn.f32 f545, f487, f535, f544;
mul.f32 f546, f543, f436;
mul.f32 f547, f545, f437;
mul.f32 f548, f543, f437;
mul.f32 f549, f486, f543;
mul.f32 f550, f487, f545;
sub.f32 f551, f549, f550;
mul.f32 f552, f486, f545;
fma.rn.f32 f553, f487, f543, f552;
mul.f32 f554, f551, f412;
mul.f32 f555, f553, f413;
mul.f32 f556, f551, f413;
mul.f32 f557, f486, f551;
mul.f32 f558, f487, f553;
sub.f32 f559, f557, f558;
mul.f32 f560, f486, f553;
fma.rn.f32 f561, f487, f551, f560;
mul.f32 f562, f559, f388;
mul.f32 f563, f561, f389;
mul.f32 f564, f559, f389;
shl.b32 r18, r17, 3;
add.s32 r19, r12, r18;
barrier.sync 0;
mad.lo.s32 r20, r15, 968, r19;
add.f32 f565, f365, f355;
add.f32 f566, f364, f354;
st.shared.v2.f32 [r20], {f566, f565};
fma.rn.f32 f567, f487, f386, f492;
sub.f32 f568, f490, f491;
st.shared.v2.f32 [r20+88], {f568, f567};
fma.rn.f32 f569, f497, f410, f500;
sub.f32 f570, f498, f499;
st.shared.v2.f32 [r20+176], {f570, f569};
fma.rn.f32 f571, f505, f434, f508;
sub.f32 f572, f506, f507;
st.shared.v2.f32 [r20+264], {f572, f571};
fma.rn.f32 f573, f513, f458, f516;
sub.f32 f574, f514, f515;
st.shared.v2.f32 [r20+352], {f574, f573};
sub.f32 f575, f522, f523;
fma.rn.f32 f576, f521, f482, f524;
st.shared.v2.f32 [r20+440], {f575, f576};
fma.rn.f32 f577, f529, f484, f532;
sub.f32 f578, f530, f531;
st.shared.v2.f32 [r20+528], {f578, f577};
fma.rn.f32 f579, f537, f460, f540;
sub.f32 f580, f538, f539;
st.shared.v2.f32 [r20+616], {f580, f579};
fma.rn.f32 f581, f545, f436, f548;
sub.f32 f582, f546, f547;
st.shared.v2.f32 [r20+704], {f582, f581};
fma.rn.f32 f583, f553, f412, f556;
sub.f32 f584, f554, f555;
st.shared.v2.f32 [r20+792], {f584, f583};
fma.rn.f32 f585, f561, f388, f564;
sub.f32 f586, f562, f563;
st.shared.v2.f32 [r20+880], {f586, f585};
barrier.sync 0;
ld.shared.v2.f32 {f587, f588}, [r14];
ld.shared.v2.f32 {f591, f592}, [r14+968];
ld.shared.v2.f32 {f595, f596}, [r14+1936];
ld.shared.v2.f32 {f599, f600}, [r14+2904];
ld.shared.v2.f32 {f603, f604}, [r14+3872];
ld.shared.v2.f32 {f607, f608}, [r14+4840];
ld.shared.v2.f32 {f611, f612}, [r14+5808];
ld.shared.v2.f32 {f615, f616}, [r14+6776];
ld.shared.v2.f32 {f619, f620}, [r14+7744];
ld.shared.v2.f32 {f623, f624}, [r14+8712];
ld.shared.v2.f32 {f627, f628}, [r14+9680];
add.f32 f631, f591, f627;
add.f32 f632, f592, f628;
sub.f32 f633, f591, f627;
sub.f32 f634, f592, f628;
add.f32 f635, f595, f623;
add.f32 f636, f596, f624;
sub.f32 f637, f595, f623;
sub.f32 f638, f596, f624;
add.f32 f639, f599, f619;
add.f32 f640, f600, f620;
sub.f32 f641, f599, f619;
sub.f32 f642, f600, f620;
add.f32 f643, f603, f615;
add.f32 f644, f604, f616;
sub.f32 f645, f603, f615;
sub.f32 f646, f604, f616;
add.f32 f647, f607, f611;
add.f32 f648, f608, f612;
sub.f32 f649, f607, f611;
sub.f32 f650, f608, f612;
add.f32 f651, f587, f631;
add.f32 f652, f588, f632;
add.f32 f653, f651, f635;
add.f32 f654, f652, f636;
add.f32 f655, f653, f639;
add.f32 f656, f654, f640;
add.f32 f657, f655, f643;
add.f32 f658, f656, f644;
fma.rn.f32 f659, f631, 0f3F575C64, f587;
fma.rn.f32 f660, f634, 0fBF0A6770, 0f00000000;
fma.rn.f32 f661, f632, 0f3F575C64, f588;
fma.rn.f32 f662, f633, 0fBF0A6770, 0f00000000;
fma.rn.f32 f663, f635, 0f3ED4B147, f659;
fma.rn.f32 f664, f638, 0fBF68DDA4, f660;
fma.rn.f32 f665, f636, 0f3ED4B147, f661;
fma.rn.f32 f666, f637, 0fBF68DDA4, f662;
fma.rn.f32 f667, f639, 0fBE11BAFB, f663;
fma.rn.f32 f668, f642, 0fBF7D64F0, f664;
fma.rn.f32 f669, f640, 0fBE11BAFB, f665;
fma.rn.f32 f670, f641, 0fBF7D64F0, f666;
fma.rn.f32 f671, f643, 0fBF27A4F4, f667;
fma.rn.f32 f672, f646, 0fBF4178CE, f668;
fma.rn.f32 f673, f644, 0fBF27A4F4, f669;
fma.rn.f32 f674, f645, 0fBF4178CE, f670;
fma.rn.f32 f675, f647, 0fBF75A155, f671;
fma.rn.f32 f676, f650, 0fBE903F40, f672;
fma.rn.f32 f677, f648, 0fBF75A155, f673;
fma.rn.f32 f678, f649, 0fBE903F40, f674;
fma.rn.f32 f679, f631, 0f3ED4B147, f587;
fma.rn.f32 f680, f634, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f681, f632, 0f3ED4B147, f588;
fma.rn.f32 f682, f633, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f683, f635, 0fBF27A4F4, f679;
fma.rn.f32 f684, f638, 0fBF4178CE, f680;
fma.rn.f32 f685, f636, 0fBF27A4F4, f681;
fma.rn.f32 f686, f637, 0fBF4178CE, f682;
fma.rn.f32 f687, f639, 0fBF75A155, f683;
fma.rn.f32 f688, f642, 0f3E903F40, f684;
fma.rn.f32 f689, f640, 0fBF75A155, f685;
fma.rn.f32 f690, f641, 0f3E903F40, f686;
fma.rn.f32 f691, f643, 0fBE11BAFB, f687;
fma.rn.f32 f692, f646, 0f3F7D64F0, f688;
fma.rn.f32 f693, f644, 0fBE11BAFB, f689;
fma.rn.f32 f694, f645, 0f3F7D64F0, f690;
fma.rn.f32 f695, f647, 0f3F575C64, f691;
fma.rn.f32 f696, f650, 0f3F0A6770, f692;
fma.rn.f32 f697, f648, 0f3F575C64, f693;
fma.rn.f32 f698, f649, 0f3F0A6770, f694;
fma.rn.f32 f699, f631, 0fBE11BAFB, f587;
fma.rn.f32 f700, f634, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f701, f632, 0fBE11BAFB, f588;
fma.rn.f32 f702, f633, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f703, f635, 0fBF75A155, f699;
fma.rn.f32 f704, f638, 0f3E903F40, f700;
fma.rn.f32 f705, f636, 0fBF75A155, f701;
fma.rn.f32 f706, f637, 0f3E903F40, f702;
fma.rn.f32 f707, f639, 0f3ED4B147, f703;
fma.rn.f32 f708, f642, 0f3F68DDA4, f704;
fma.rn.f32 f709, f640, 0f3ED4B147, f705;
fma.rn.f32 f710, f641, 0f3F68DDA4, f706;
fma.rn.f32 f711, f643, 0f3F575C64, f707;
fma.rn.f32 f712, f646, 0fBF0A6770, f708;
fma.rn.f32 f713, f644, 0f3F575C64, f709;
fma.rn.f32 f714, f645, 0fBF0A6770, f710;
fma.rn.f32 f715, f647, 0fBF27A4F4, f711;
fma.rn.f32 f716, f650, 0fBF4178CE, f712;
fma.rn.f32 f717, f648, 0fBF27A4F4, f713;
fma.rn.f32 f718, f649, 0fBF4178CE, f714;
fma.rn.f32 f719, f631, 0fBF27A4F4, f587;
fma.rn.f32 f720, f634, 0fBF4178CE, 0f00000000;
fma.rn.f32 f721, f632, 0fBF27A4F4, f588;
fma.rn.f32 f722, f633, 0fBF4178CE, 0f00000000;
fma.rn.f32 f723, f635, 0fBE11BAFB, f719;
fma.rn.f32 f724, f638, 0f3F7D64F0, f720;
fma.rn.f32 f725, f636, 0fBE11BAFB, f721;
fma.rn.f32 f726, f637, 0f3F7D64F0, f722;
fma.rn.f32 f727, f639, 0f3F575C64, f723;
fma.rn.f32 f728, f642, 0fBF0A6770, f724;
fma.rn.f32 f729, f640, 0f3F575C64, f725;
fma.rn.f32 f730, f641, 0fBF0A6770, f726;
fma.rn.f32 f731, f643, 0fBF75A155, f727;
fma.rn.f32 f732, f646, 0fBE903F40, f728;
fma.rn.f32 f733, f644, 0fBF75A155, f729;
fma.rn.f32 f734, f645, 0fBE903F40, f730;
fma.rn.f32 f735, f647, 0f3ED4B147, f731;
fma.rn.f32 f736, f650, 0f3F68DDA4, f732;
fma.rn.f32 f737, f648, 0f3ED4B147, f733;
fma.rn.f32 f738, f649, 0f3F68DDA4, f734;
fma.rn.f32 f739, f631, 0fBF75A155, f587;
fma.rn.f32 f740, f634, 0fBE903F40, 0f00000000;
fma.rn.f32 f741, f632, 0fBF75A155, f588;
fma.rn.f32 f742, f633, 0fBE903F40, 0f00000000;
fma.rn.f32 f743, f635, 0f3F575C64, f739;
fma.rn.f32 f744, f638, 0f3F0A6770, f740;
fma.rn.f32 f745, f636, 0f3F575C64, f741;
fma.rn.f32 f746, f637, 0f3F0A6770, f742;
fma.rn.f32 f747, f639, 0fBF27A4F4, f743;
fma.rn.f32 f748, f642, 0fBF4178CE, f744;
fma.rn.f32 f749, f640, 0fBF27A4F4, f745;
fma.rn.f32 f750, f641, 0fBF4178CE, f746;
fma.rn.f32 f751, f643, 0f3ED4B147, f747;
fma.rn.f32 f752, f646, 0f3F68DDA4, f748;
fma.rn.f32 f753, f644, 0f3ED4B147, f749;
fma.rn.f32 f754, f645, 0f3F68DDA4, f750;
fma.rn.f32 f755, f647, 0fBE11BAFB, f751;
fma.rn.f32 f756, f650, 0fBF7D64F0, f752;
fma.rn.f32 f757, f648, 0fBE11BAFB, f753;
fma.rn.f32 f758, f649, 0fBF7D64F0, f754;
add.f32 %1, f658, f648;
add.f32 %0, f657, f647;
add.f32 %3, f678, f677;
sub.f32 %2, f675, f676;
add.f32 %5, f698, f697;
sub.f32 %4, f695, f696;
add.f32 %7, f718, f717;
sub.f32 %6, f715, f716;
add.f32 %9, f738, f737;
sub.f32 %8, f735, f736;
add.f32 %11, f758, f757;
sub.f32 %10, f755, f756;
sub.f32 %13, f757, f758;
add.f32 %12, f756, f755;
sub.f32 %15, f737, f738;
add.f32 %14, f736, f735;
sub.f32 %17, f717, f718;
add.f32 %16, f716, f715;
sub.f32 %19, f697, f698;
add.f32 %18, f696, f695;
sub.f32 %21, f677, f678;
add.f32 %20, f676, f675;
})"
     : "=f"(rmem[0].x), "=f"(rmem[0].y), "=f"(rmem[1].x), "=f"(rmem[1].y), "=f"(rmem[2].x), "=f"(rmem[2].y), "=f"(rmem[3].x), "=f"(rmem[3].y), "=f"(rmem[4].x), "=f"(rmem[4].y), "=f"(rmem[5].x), "=f"(rmem[5].y), "=f"(rmem[6].x), "=f"(rmem[6].y), "=f"(rmem[7].x), "=f"(rmem[7].y), "=f"(rmem[8].x), "=f"(rmem[8].y), "=f"(rmem[9].x), "=f"(rmem[9].y), "=f"(rmem[10].x), "=f"(rmem[10].y): "r"(smem), "l"(lut_sp_11_1331), "l"(lut_sp_11_121), "f"(rmem[0].x), "f"(rmem[0].y), "f"(rmem[1].x), "f"(rmem[1].y), "f"(rmem[1].y), "f"(rmem[2].x), "f"(rmem[2].y), "f"(rmem[2].y), "f"(rmem[3].x), "f"(rmem[3].y), "f"(rmem[4].x), "f"(rmem[4].y), "f"(rmem[4].y), "f"(rmem[5].x), "f"(rmem[5].y), "f"(rmem[5].y), "f"(rmem[6].x), "f"(rmem[6].y), "f"(rmem[7].x), "f"(rmem[7].y), "f"(rmem[7].y), "f"(rmem[8].x), "f"(rmem[8].y), "f"(rmem[8].y), "f"(rmem[9].x), "f"(rmem[9].y), "f"(rmem[10].x), "f"(rmem[10].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<181, float, 1>(cufftdx::detail::complex<float> *rmem, unsigned smem){

asm volatile (R"({
.reg .f32 f<737>;
.reg .b32 r<21>;
.reg .b64 rd<12>;
mov.u32 r1, %tid.y;
mov.u32 r2, %22;
mad.lo.s32 r3, r1, 5324, r2;
add.f32 f45, %27, %51;
add.f32 f46, %29, %52;
sub.f32 f47, %27, %51;
sub.f32 f48, %29, %52;
add.f32 f49, %30, %49;
add.f32 f50, %32, %50;
sub.f32 f51, %30, %49;
sub.f32 f52, %32, %50;
add.f32 f53, %33, %46;
add.f32 f54, %34, %48;
sub.f32 f55, %33, %46;
sub.f32 f56, %34, %48;
add.f32 f57, %35, %43;
add.f32 f58, %37, %45;
sub.f32 f59, %35, %43;
sub.f32 f60, %37, %45;
add.f32 f61, %38, %41;
add.f32 f62, %40, %42;
sub.f32 f63, %38, %41;
sub.f32 f64, %40, %42;
mov.u32 r4, %tid.x;
add.f32 f65, %25, f45;
add.f32 f66, %26, f46;
add.f32 f67, f65, f49;
add.f32 f68, f66, f50;
add.f32 f69, f67, f53;
add.f32 f70, f68, f54;
add.f32 f71, f69, f57;
add.f32 f72, f70, f58;
add.f32 f73, f71, f61;
add.f32 f74, f72, f62;
fma.rn.f32 f75, f45, 0f3F575C64, %25;
fma.rn.f32 f76, f48, 0fBF0A6770, 0f00000000;
fma.rn.f32 f77, f46, 0f3F575C64, %26;
fma.rn.f32 f78, f47, 0fBF0A6770, 0f00000000;
fma.rn.f32 f79, f49, 0f3ED4B147, f75;
fma.rn.f32 f80, f52, 0fBF68DDA4, f76;
fma.rn.f32 f81, f50, 0f3ED4B147, f77;
fma.rn.f32 f82, f51, 0fBF68DDA4, f78;
fma.rn.f32 f83, f53, 0fBE11BAFB, f79;
fma.rn.f32 f84, f56, 0fBF7D64F0, f80;
fma.rn.f32 f85, f54, 0fBE11BAFB, f81;
fma.rn.f32 f86, f55, 0fBF7D64F0, f82;
fma.rn.f32 f87, f57, 0fBF27A4F4, f83;
fma.rn.f32 f88, f60, 0fBF4178CE, f84;
fma.rn.f32 f89, f58, 0fBF27A4F4, f85;
fma.rn.f32 f90, f59, 0fBF4178CE, f86;
fma.rn.f32 f91, f61, 0fBF75A155, f87;
fma.rn.f32 f92, f64, 0fBE903F40, f88;
fma.rn.f32 f93, f62, 0fBF75A155, f89;
fma.rn.f32 f94, f63, 0fBE903F40, f90;
sub.f32 f95, f91, f92;
add.f32 f96, f94, f93;
add.f32 f97, f92, f91;
sub.f32 f98, f93, f94;
fma.rn.f32 f99, f45, 0f3ED4B147, %25;
fma.rn.f32 f100, f48, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f101, f46, 0f3ED4B147, %26;
fma.rn.f32 f102, f47, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f103, f49, 0fBF27A4F4, f99;
fma.rn.f32 f104, f52, 0fBF4178CE, f100;
fma.rn.f32 f105, f50, 0fBF27A4F4, f101;
fma.rn.f32 f106, f51, 0fBF4178CE, f102;
fma.rn.f32 f107, f53, 0fBF75A155, f103;
fma.rn.f32 f108, f56, 0f3E903F40, f104;
fma.rn.f32 f109, f54, 0fBF75A155, f105;
fma.rn.f32 f110, f55, 0f3E903F40, f106;
fma.rn.f32 f111, f57, 0fBE11BAFB, f107;
fma.rn.f32 f112, f60, 0f3F7D64F0, f108;
fma.rn.f32 f113, f58, 0fBE11BAFB, f109;
fma.rn.f32 f114, f59, 0f3F7D64F0, f110;
fma.rn.f32 f115, f61, 0f3F575C64, f111;
fma.rn.f32 f116, f64, 0f3F0A6770, f112;
fma.rn.f32 f117, f62, 0f3F575C64, f113;
fma.rn.f32 f118, f63, 0f3F0A6770, f114;
sub.f32 f119, f115, f116;
add.f32 f120, f118, f117;
add.f32 f121, f116, f115;
sub.f32 f122, f117, f118;
fma.rn.f32 f123, f45, 0fBE11BAFB, %25;
fma.rn.f32 f124, f48, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f125, f46, 0fBE11BAFB, %26;
fma.rn.f32 f126, f47, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f127, f49, 0fBF75A155, f123;
fma.rn.f32 f128, f52, 0f3E903F40, f124;
fma.rn.f32 f129, f50, 0fBF75A155, f125;
fma.rn.f32 f130, f51, 0f3E903F40, f126;
fma.rn.f32 f131, f53, 0f3ED4B147, f127;
fma.rn.f32 f132, f56, 0f3F68DDA4, f128;
fma.rn.f32 f133, f54, 0f3ED4B147, f129;
fma.rn.f32 f134, f55, 0f3F68DDA4, f130;
fma.rn.f32 f135, f57, 0f3F575C64, f131;
fma.rn.f32 f136, f60, 0fBF0A6770, f132;
fma.rn.f32 f137, f58, 0f3F575C64, f133;
fma.rn.f32 f138, f59, 0fBF0A6770, f134;
fma.rn.f32 f139, f61, 0fBF27A4F4, f135;
fma.rn.f32 f140, f64, 0fBF4178CE, f136;
fma.rn.f32 f141, f62, 0fBF27A4F4, f137;
fma.rn.f32 f142, f63, 0fBF4178CE, f138;
sub.f32 f143, f139, f140;
add.f32 f144, f142, f141;
add.f32 f145, f140, f139;
sub.f32 f146, f141, f142;
fma.rn.f32 f147, f45, 0fBF27A4F4, %25;
fma.rn.f32 f148, f48, 0fBF4178CE, 0f00000000;
fma.rn.f32 f149, f46, 0fBF27A4F4, %26;
fma.rn.f32 f150, f47, 0fBF4178CE, 0f00000000;
fma.rn.f32 f151, f49, 0fBE11BAFB, f147;
fma.rn.f32 f152, f52, 0f3F7D64F0, f148;
fma.rn.f32 f153, f50, 0fBE11BAFB, f149;
fma.rn.f32 f154, f51, 0f3F7D64F0, f150;
fma.rn.f32 f155, f53, 0f3F575C64, f151;
fma.rn.f32 f156, f56, 0fBF0A6770, f152;
fma.rn.f32 f157, f54, 0f3F575C64, f153;
fma.rn.f32 f158, f55, 0fBF0A6770, f154;
fma.rn.f32 f159, f57, 0fBF75A155, f155;
fma.rn.f32 f160, f60, 0fBE903F40, f156;
fma.rn.f32 f161, f58, 0fBF75A155, f157;
fma.rn.f32 f162, f59, 0fBE903F40, f158;
fma.rn.f32 f163, f61, 0f3ED4B147, f159;
fma.rn.f32 f164, f64, 0f3F68DDA4, f160;
fma.rn.f32 f165, f62, 0f3ED4B147, f161;
fma.rn.f32 f166, f63, 0f3F68DDA4, f162;
sub.f32 f167, f163, f164;
add.f32 f168, f166, f165;
add.f32 f169, f164, f163;
sub.f32 f170, f165, f166;
fma.rn.f32 f171, f45, 0fBF75A155, %25;
fma.rn.f32 f172, f48, 0fBE903F40, 0f00000000;
fma.rn.f32 f173, f46, 0fBF75A155, %26;
fma.rn.f32 f174, f47, 0fBE903F40, 0f00000000;
fma.rn.f32 f175, f49, 0f3F575C64, f171;
fma.rn.f32 f176, f52, 0f3F0A6770, f172;
fma.rn.f32 f177, f50, 0f3F575C64, f173;
fma.rn.f32 f178, f51, 0f3F0A6770, f174;
fma.rn.f32 f179, f53, 0fBF27A4F4, f175;
fma.rn.f32 f180, f56, 0fBF4178CE, f176;
fma.rn.f32 f181, f54, 0fBF27A4F4, f177;
fma.rn.f32 f182, f55, 0fBF4178CE, f178;
fma.rn.f32 f183, f57, 0f3ED4B147, f179;
fma.rn.f32 f184, f60, 0f3F68DDA4, f180;
fma.rn.f32 f185, f58, 0f3ED4B147, f181;
fma.rn.f32 f186, f59, 0f3F68DDA4, f182;
fma.rn.f32 f187, f61, 0fBE11BAFB, f183;
fma.rn.f32 f188, f64, 0fBF7D64F0, f184;
fma.rn.f32 f189, f62, 0fBE11BAFB, f185;
fma.rn.f32 f190, f63, 0fBF7D64F0, f186;
sub.f32 f191, f187, f188;
add.f32 f192, f190, f189;
add.f32 f193, f188, f187;
sub.f32 f194, f189, f190;
mul.wide.u32 rd2, r4, 248469183;
shr.u64 rd3, rd2, 32;
cvt.u32.u64 r5, rd3;
sub.s32 r6, r4, r5;
shr.u32 r7, r6, 1;
add.s32 r8, r7, r5;
shr.u32 r9, r8, 6;
mul.lo.s32 r10, r9, 121;
sub.s32 r11, r4, r10;
mul.wide.u32 rd4, r11, 8;
mov.u64 rd5, %23;
add.s64 rd6, rd5, rd4;
ld.global.v2.f32 {f195, f196}, [rd6];
mul.f32 f199, f195, f95;
mul.f32 f200, f196, f96;
sub.f32 f201, f199, f200;
mul.f32 f202, f195, f96;
fma.rn.f32 f203, f196, f95, f202;
mul.f32 f204, f195, f195;
mul.f32 f205, f196, f196;
sub.f32 f206, f204, f205;
mul.f32 f207, f196, f195;
fma.rn.f32 f208, f196, f195, f207;
mul.f32 f209, f206, f119;
mul.f32 f210, f208, f120;
sub.f32 f211, f209, f210;
mul.f32 f212, f206, f120;
fma.rn.f32 f213, f208, f119, f212;
mul.f32 f214, f195, f206;
mul.f32 f215, f196, f208;
sub.f32 f216, f214, f215;
mul.f32 f217, f195, f208;
fma.rn.f32 f218, f196, f206, f217;
mul.f32 f219, f216, f143;
mul.f32 f220, f218, f144;
sub.f32 f221, f219, f220;
mul.f32 f222, f216, f144;
fma.rn.f32 f223, f218, f143, f222;
mul.f32 f224, f195, f216;
mul.f32 f225, f196, f218;
sub.f32 f226, f224, f225;
mul.f32 f227, f195, f218;
fma.rn.f32 f228, f196, f216, f227;
mul.f32 f229, f226, f167;
mul.f32 f230, f228, f168;
sub.f32 f231, f229, f230;
mul.f32 f232, f226, f168;
fma.rn.f32 f233, f228, f167, f232;
mul.f32 f234, f195, f226;
mul.f32 f235, f196, f228;
sub.f32 f236, f234, f235;
mul.f32 f237, f195, f228;
fma.rn.f32 f238, f196, f226, f237;
mul.f32 f239, f236, f191;
mul.f32 f240, f238, f192;
sub.f32 f241, f239, f240;
mul.f32 f242, f236, f192;
fma.rn.f32 f243, f238, f191, f242;
mul.f32 f244, f195, f236;
mul.f32 f245, f196, f238;
sub.f32 f246, f244, f245;
mul.f32 f247, f195, f238;
fma.rn.f32 f248, f196, f236, f247;
mul.f32 f249, f246, f193;
mul.f32 f250, f248, f194;
sub.f32 f251, f249, f250;
mul.f32 f252, f246, f194;
fma.rn.f32 f253, f248, f193, f252;
mul.f32 f254, f195, f246;
mul.f32 f255, f196, f248;
sub.f32 f256, f254, f255;
mul.f32 f257, f195, f248;
fma.rn.f32 f258, f196, f246, f257;
mul.f32 f259, f256, f169;
mul.f32 f260, f258, f170;
sub.f32 f261, f259, f260;
mul.f32 f262, f256, f170;
fma.rn.f32 f263, f258, f169, f262;
mul.f32 f264, f195, f256;
mul.f32 f265, f196, f258;
sub.f32 f266, f264, f265;
mul.f32 f267, f195, f258;
fma.rn.f32 f268, f196, f256, f267;
mul.f32 f269, f266, f145;
mul.f32 f270, f268, f146;
sub.f32 f271, f269, f270;
mul.f32 f272, f266, f146;
fma.rn.f32 f273, f268, f145, f272;
mul.f32 f274, f195, f266;
mul.f32 f275, f196, f268;
sub.f32 f276, f274, f275;
mul.f32 f277, f195, f268;
fma.rn.f32 f278, f196, f266, f277;
mul.f32 f279, f276, f121;
mul.f32 f280, f278, f122;
sub.f32 f281, f279, f280;
mul.f32 f282, f276, f122;
fma.rn.f32 f283, f278, f121, f282;
mul.f32 f284, f195, f276;
mul.f32 f285, f196, f278;
sub.f32 f286, f284, f285;
mul.f32 f287, f195, f278;
fma.rn.f32 f288, f196, f276, f287;
mul.f32 f289, f286, f97;
mul.f32 f290, f288, f98;
sub.f32 f291, f289, f290;
mul.f32 f292, f286, f98;
fma.rn.f32 f293, f288, f97, f292;
mad.lo.s32 r12, r9, 5324, r3;
barrier.sync 0;
mad.lo.s32 r13, r11, 44, r12;
st.shared.f32 [r13], f73;
st.shared.f32 [r13+4], f201;
st.shared.f32 [r13+8], f211;
st.shared.f32 [r13+12], f221;
st.shared.f32 [r13+16], f231;
st.shared.f32 [r13+20], f241;
st.shared.f32 [r13+24], f251;
st.shared.f32 [r13+28], f261;
st.shared.f32 [r13+32], f271;
st.shared.f32 [r13+36], f281;
st.shared.f32 [r13+40], f291;
barrier.sync 0;
mad.lo.s32 r14, r11, -40, r13;
ld.shared.f32 f294, [r14];
ld.shared.f32 f295, [r14+484];
ld.shared.f32 f296, [r14+968];
ld.shared.f32 f297, [r14+1452];
ld.shared.f32 f298, [r14+1936];
ld.shared.f32 f299, [r14+2420];
ld.shared.f32 f300, [r14+2904];
ld.shared.f32 f301, [r14+3388];
ld.shared.f32 f302, [r14+3872];
ld.shared.f32 f303, [r14+4356];
ld.shared.f32 f304, [r14+4840];
barrier.sync 0;
st.shared.f32 [r13], f74;
st.shared.f32 [r13+4], f203;
st.shared.f32 [r13+8], f213;
st.shared.f32 [r13+12], f223;
st.shared.f32 [r13+16], f233;
st.shared.f32 [r13+20], f243;
st.shared.f32 [r13+24], f253;
st.shared.f32 [r13+28], f263;
st.shared.f32 [r13+32], f273;
st.shared.f32 [r13+36], f283;
st.shared.f32 [r13+40], f293;
barrier.sync 0;
ld.shared.f32 f305, [r14];
ld.shared.f32 f306, [r14+484];
ld.shared.f32 f307, [r14+968];
ld.shared.f32 f308, [r14+1452];
ld.shared.f32 f309, [r14+1936];
ld.shared.f32 f310, [r14+2420];
ld.shared.f32 f311, [r14+2904];
ld.shared.f32 f312, [r14+3388];
ld.shared.f32 f313, [r14+3872];
ld.shared.f32 f314, [r14+4356];
ld.shared.f32 f315, [r14+4840];
add.f32 f316, f295, f304;
add.f32 f317, f306, f315;
sub.f32 f318, f295, f304;
sub.f32 f319, f306, f315;
add.f32 f320, f296, f303;
add.f32 f321, f307, f314;
sub.f32 f322, f296, f303;
sub.f32 f323, f307, f314;
add.f32 f324, f297, f302;
add.f32 f325, f308, f313;
sub.f32 f326, f297, f302;
sub.f32 f327, f308, f313;
add.f32 f328, f298, f301;
add.f32 f329, f309, f312;
sub.f32 f330, f298, f301;
sub.f32 f331, f309, f312;
add.f32 f332, f299, f300;
add.f32 f333, f310, f311;
sub.f32 f334, f299, f300;
sub.f32 f335, f310, f311;
add.f32 f336, f294, f316;
add.f32 f337, f305, f317;
add.f32 f338, f336, f320;
add.f32 f339, f337, f321;
add.f32 f340, f338, f324;
add.f32 f341, f339, f325;
add.f32 f342, f340, f328;
add.f32 f343, f341, f329;
add.f32 f344, f342, f332;
add.f32 f345, f343, f333;
fma.rn.f32 f346, f316, 0f3F575C64, f294;
fma.rn.f32 f347, f319, 0fBF0A6770, 0f00000000;
fma.rn.f32 f348, f317, 0f3F575C64, f305;
fma.rn.f32 f349, f318, 0fBF0A6770, 0f00000000;
fma.rn.f32 f350, f320, 0f3ED4B147, f346;
fma.rn.f32 f351, f323, 0fBF68DDA4, f347;
fma.rn.f32 f352, f321, 0f3ED4B147, f348;
fma.rn.f32 f353, f322, 0fBF68DDA4, f349;
fma.rn.f32 f354, f324, 0fBE11BAFB, f350;
fma.rn.f32 f355, f327, 0fBF7D64F0, f351;
fma.rn.f32 f356, f325, 0fBE11BAFB, f352;
fma.rn.f32 f357, f326, 0fBF7D64F0, f353;
fma.rn.f32 f358, f328, 0fBF27A4F4, f354;
fma.rn.f32 f359, f331, 0fBF4178CE, f355;
fma.rn.f32 f360, f329, 0fBF27A4F4, f356;
fma.rn.f32 f361, f330, 0fBF4178CE, f357;
fma.rn.f32 f362, f332, 0fBF75A155, f358;
fma.rn.f32 f363, f335, 0fBE903F40, f359;
fma.rn.f32 f364, f333, 0fBF75A155, f360;
fma.rn.f32 f365, f334, 0fBE903F40, f361;
sub.f32 f366, f362, f363;
add.f32 f367, f365, f364;
add.f32 f368, f363, f362;
sub.f32 f369, f364, f365;
fma.rn.f32 f370, f316, 0f3ED4B147, f294;
fma.rn.f32 f371, f319, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f372, f317, 0f3ED4B147, f305;
fma.rn.f32 f373, f318, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f374, f320, 0fBF27A4F4, f370;
fma.rn.f32 f375, f323, 0fBF4178CE, f371;
fma.rn.f32 f376, f321, 0fBF27A4F4, f372;
fma.rn.f32 f377, f322, 0fBF4178CE, f373;
fma.rn.f32 f378, f324, 0fBF75A155, f374;
fma.rn.f32 f379, f327, 0f3E903F40, f375;
fma.rn.f32 f380, f325, 0fBF75A155, f376;
fma.rn.f32 f381, f326, 0f3E903F40, f377;
fma.rn.f32 f382, f328, 0fBE11BAFB, f378;
fma.rn.f32 f383, f331, 0f3F7D64F0, f379;
fma.rn.f32 f384, f329, 0fBE11BAFB, f380;
fma.rn.f32 f385, f330, 0f3F7D64F0, f381;
fma.rn.f32 f386, f332, 0f3F575C64, f382;
fma.rn.f32 f387, f335, 0f3F0A6770, f383;
fma.rn.f32 f388, f333, 0f3F575C64, f384;
fma.rn.f32 f389, f334, 0f3F0A6770, f385;
sub.f32 f390, f386, f387;
add.f32 f391, f389, f388;
add.f32 f392, f387, f386;
sub.f32 f393, f388, f389;
fma.rn.f32 f394, f316, 0fBE11BAFB, f294;
fma.rn.f32 f395, f319, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f396, f317, 0fBE11BAFB, f305;
fma.rn.f32 f397, f318, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f398, f320, 0fBF75A155, f394;
fma.rn.f32 f399, f323, 0f3E903F40, f395;
fma.rn.f32 f400, f321, 0fBF75A155, f396;
fma.rn.f32 f401, f322, 0f3E903F40, f397;
fma.rn.f32 f402, f324, 0f3ED4B147, f398;
fma.rn.f32 f403, f327, 0f3F68DDA4, f399;
fma.rn.f32 f404, f325, 0f3ED4B147, f400;
fma.rn.f32 f405, f326, 0f3F68DDA4, f401;
fma.rn.f32 f406, f328, 0f3F575C64, f402;
fma.rn.f32 f407, f331, 0fBF0A6770, f403;
fma.rn.f32 f408, f329, 0f3F575C64, f404;
fma.rn.f32 f409, f330, 0fBF0A6770, f405;
fma.rn.f32 f410, f332, 0fBF27A4F4, f406;
fma.rn.f32 f411, f335, 0fBF4178CE, f407;
fma.rn.f32 f412, f333, 0fBF27A4F4, f408;
fma.rn.f32 f413, f334, 0fBF4178CE, f409;
sub.f32 f414, f410, f411;
add.f32 f415, f413, f412;
add.f32 f416, f411, f410;
sub.f32 f417, f412, f413;
fma.rn.f32 f418, f316, 0fBF27A4F4, f294;
fma.rn.f32 f419, f319, 0fBF4178CE, 0f00000000;
fma.rn.f32 f420, f317, 0fBF27A4F4, f305;
fma.rn.f32 f421, f318, 0fBF4178CE, 0f00000000;
fma.rn.f32 f422, f320, 0fBE11BAFB, f418;
fma.rn.f32 f423, f323, 0f3F7D64F0, f419;
fma.rn.f32 f424, f321, 0fBE11BAFB, f420;
fma.rn.f32 f425, f322, 0f3F7D64F0, f421;
fma.rn.f32 f426, f324, 0f3F575C64, f422;
fma.rn.f32 f427, f327, 0fBF0A6770, f423;
fma.rn.f32 f428, f325, 0f3F575C64, f424;
fma.rn.f32 f429, f326, 0fBF0A6770, f425;
fma.rn.f32 f430, f328, 0fBF75A155, f426;
fma.rn.f32 f431, f331, 0fBE903F40, f427;
fma.rn.f32 f432, f329, 0fBF75A155, f428;
fma.rn.f32 f433, f330, 0fBE903F40, f429;
fma.rn.f32 f434, f332, 0f3ED4B147, f430;
fma.rn.f32 f435, f335, 0f3F68DDA4, f431;
fma.rn.f32 f436, f333, 0f3ED4B147, f432;
fma.rn.f32 f437, f334, 0f3F68DDA4, f433;
sub.f32 f438, f434, f435;
add.f32 f439, f437, f436;
add.f32 f440, f435, f434;
sub.f32 f441, f436, f437;
fma.rn.f32 f442, f316, 0fBF75A155, f294;
fma.rn.f32 f443, f319, 0fBE903F40, 0f00000000;
fma.rn.f32 f444, f317, 0fBF75A155, f305;
fma.rn.f32 f445, f318, 0fBE903F40, 0f00000000;
fma.rn.f32 f446, f320, 0f3F575C64, f442;
fma.rn.f32 f447, f323, 0f3F0A6770, f443;
fma.rn.f32 f448, f321, 0f3F575C64, f444;
fma.rn.f32 f449, f322, 0f3F0A6770, f445;
fma.rn.f32 f450, f324, 0fBF27A4F4, f446;
fma.rn.f32 f451, f327, 0fBF4178CE, f447;
fma.rn.f32 f452, f325, 0fBF27A4F4, f448;
fma.rn.f32 f453, f326, 0fBF4178CE, f449;
fma.rn.f32 f454, f328, 0f3ED4B147, f450;
fma.rn.f32 f455, f331, 0f3F68DDA4, f451;
fma.rn.f32 f456, f329, 0f3ED4B147, f452;
fma.rn.f32 f457, f330, 0f3F68DDA4, f453;
fma.rn.f32 f458, f332, 0fBE11BAFB, f454;
fma.rn.f32 f459, f335, 0fBF7D64F0, f455;
fma.rn.f32 f460, f333, 0fBE11BAFB, f456;
fma.rn.f32 f461, f334, 0fBF7D64F0, f457;
sub.f32 f462, f458, f459;
add.f32 f463, f461, f460;
add.f32 f464, f459, f458;
sub.f32 f465, f460, f461;
mul.wide.u32 rd7, r11, -1171354717;
shr.u64 rd8, rd7, 35;
cvt.u32.u64 r15, rd8;
mul.lo.s32 r16, r15, 11;
sub.s32 r17, r11, r16;
mul.wide.u32 rd9, r15, 8;
mov.u64 rd10, %24;
add.s64 rd11, rd10, rd9;
ld.global.v2.f32 {f466, f467}, [rd11];
mul.f32 f470, f466, f366;
mul.f32 f471, f467, f367;
sub.f32 f472, f470, f471;
mul.f32 f473, f466, f367;
fma.rn.f32 f474, f467, f366, f473;
mul.f32 f475, f466, f466;
mul.f32 f476, f467, f467;
sub.f32 f477, f475, f476;
mul.f32 f478, f467, f466;
fma.rn.f32 f479, f467, f466, f478;
mul.f32 f480, f477, f390;
mul.f32 f481, f479, f391;
sub.f32 f482, f480, f481;
mul.f32 f483, f477, f391;
fma.rn.f32 f484, f479, f390, f483;
mul.f32 f485, f466, f477;
mul.f32 f486, f467, f479;
sub.f32 f487, f485, f486;
mul.f32 f488, f466, f479;
fma.rn.f32 f489, f467, f477, f488;
mul.f32 f490, f487, f414;
mul.f32 f491, f489, f415;
sub.f32 f492, f490, f491;
mul.f32 f493, f487, f415;
fma.rn.f32 f494, f489, f414, f493;
mul.f32 f495, f466, f487;
mul.f32 f496, f467, f489;
sub.f32 f497, f495, f496;
mul.f32 f498, f466, f489;
fma.rn.f32 f499, f467, f487, f498;
mul.f32 f500, f497, f438;
mul.f32 f501, f499, f439;
sub.f32 f502, f500, f501;
mul.f32 f503, f497, f439;
fma.rn.f32 f504, f499, f438, f503;
mul.f32 f505, f466, f497;
mul.f32 f506, f467, f499;
sub.f32 f507, f505, f506;
mul.f32 f508, f466, f499;
fma.rn.f32 f509, f467, f497, f508;
mul.f32 f510, f507, f462;
mul.f32 f511, f509, f463;
sub.f32 f512, f510, f511;
mul.f32 f513, f507, f463;
fma.rn.f32 f514, f509, f462, f513;
mul.f32 f515, f466, f507;
mul.f32 f516, f467, f509;
sub.f32 f517, f515, f516;
mul.f32 f518, f466, f509;
fma.rn.f32 f519, f467, f507, f518;
mul.f32 f520, f517, f464;
mul.f32 f521, f519, f465;
sub.f32 f522, f520, f521;
mul.f32 f523, f517, f465;
fma.rn.f32 f524, f519, f464, f523;
mul.f32 f525, f466, f517;
mul.f32 f526, f467, f519;
sub.f32 f527, f525, f526;
mul.f32 f528, f466, f519;
fma.rn.f32 f529, f467, f517, f528;
mul.f32 f530, f527, f440;
mul.f32 f531, f529, f441;
sub.f32 f532, f530, f531;
mul.f32 f533, f527, f441;
fma.rn.f32 f534, f529, f440, f533;
mul.f32 f535, f466, f527;
mul.f32 f536, f467, f529;
sub.f32 f537, f535, f536;
mul.f32 f538, f466, f529;
fma.rn.f32 f539, f467, f527, f538;
mul.f32 f540, f537, f416;
mul.f32 f541, f539, f417;
sub.f32 f542, f540, f541;
mul.f32 f543, f537, f417;
fma.rn.f32 f544, f539, f416, f543;
mul.f32 f545, f466, f537;
mul.f32 f546, f467, f539;
sub.f32 f547, f545, f546;
mul.f32 f548, f466, f539;
fma.rn.f32 f549, f467, f537, f548;
mul.f32 f550, f547, f392;
mul.f32 f551, f549, f393;
sub.f32 f552, f550, f551;
mul.f32 f553, f547, f393;
fma.rn.f32 f554, f549, f392, f553;
mul.f32 f555, f466, f547;
mul.f32 f556, f467, f549;
sub.f32 f557, f555, f556;
mul.f32 f558, f466, f549;
fma.rn.f32 f559, f467, f547, f558;
mul.f32 f560, f557, f368;
mul.f32 f561, f559, f369;
sub.f32 f562, f560, f561;
mul.f32 f563, f557, f369;
fma.rn.f32 f564, f559, f368, f563;
shl.b32 r18, r17, 2;
add.s32 r19, r12, r18;
barrier.sync 0;
mad.lo.s32 r20, r15, 484, r19;
st.shared.f32 [r20], f344;
st.shared.f32 [r20+44], f472;
st.shared.f32 [r20+88], f482;
st.shared.f32 [r20+132], f492;
st.shared.f32 [r20+176], f502;
st.shared.f32 [r20+220], f512;
st.shared.f32 [r20+264], f522;
st.shared.f32 [r20+308], f532;
st.shared.f32 [r20+352], f542;
st.shared.f32 [r20+396], f552;
st.shared.f32 [r20+440], f562;
barrier.sync 0;
ld.shared.f32 f565, [r14];
ld.shared.f32 f566, [r14+484];
ld.shared.f32 f567, [r14+968];
ld.shared.f32 f568, [r14+1452];
ld.shared.f32 f569, [r14+1936];
ld.shared.f32 f570, [r14+2420];
ld.shared.f32 f571, [r14+2904];
ld.shared.f32 f572, [r14+3388];
ld.shared.f32 f573, [r14+3872];
ld.shared.f32 f574, [r14+4356];
ld.shared.f32 f575, [r14+4840];
barrier.sync 0;
st.shared.f32 [r20], f345;
st.shared.f32 [r20+44], f474;
st.shared.f32 [r20+88], f484;
st.shared.f32 [r20+132], f494;
st.shared.f32 [r20+176], f504;
st.shared.f32 [r20+220], f514;
st.shared.f32 [r20+264], f524;
st.shared.f32 [r20+308], f534;
st.shared.f32 [r20+352], f544;
st.shared.f32 [r20+396], f554;
st.shared.f32 [r20+440], f564;
barrier.sync 0;
ld.shared.f32 f576, [r14];
ld.shared.f32 f577, [r14+484];
ld.shared.f32 f578, [r14+968];
ld.shared.f32 f579, [r14+1452];
ld.shared.f32 f580, [r14+1936];
ld.shared.f32 f581, [r14+2420];
ld.shared.f32 f582, [r14+2904];
ld.shared.f32 f583, [r14+3388];
ld.shared.f32 f584, [r14+3872];
ld.shared.f32 f585, [r14+4356];
ld.shared.f32 f586, [r14+4840];
add.f32 f587, f566, f575;
add.f32 f588, f577, f586;
sub.f32 f589, f566, f575;
sub.f32 f590, f577, f586;
add.f32 f591, f567, f574;
add.f32 f592, f578, f585;
sub.f32 f593, f567, f574;
sub.f32 f594, f578, f585;
add.f32 f595, f568, f573;
add.f32 f596, f579, f584;
sub.f32 f597, f568, f573;
sub.f32 f598, f579, f584;
add.f32 f599, f569, f572;
add.f32 f600, f580, f583;
sub.f32 f601, f569, f572;
sub.f32 f602, f580, f583;
add.f32 f603, f570, f571;
add.f32 f604, f581, f582;
sub.f32 f605, f570, f571;
sub.f32 f606, f581, f582;
add.f32 f607, f565, f587;
add.f32 f608, f576, f588;
add.f32 f609, f607, f591;
add.f32 f610, f608, f592;
add.f32 f611, f609, f595;
add.f32 f612, f610, f596;
add.f32 f613, f611, f599;
add.f32 f614, f612, f600;
fma.rn.f32 f615, f587, 0f3F575C64, f565;
fma.rn.f32 f616, f590, 0fBF0A6770, 0f00000000;
fma.rn.f32 f617, f588, 0f3F575C64, f576;
fma.rn.f32 f618, f589, 0fBF0A6770, 0f00000000;
fma.rn.f32 f619, f591, 0f3ED4B147, f615;
fma.rn.f32 f620, f594, 0fBF68DDA4, f616;
fma.rn.f32 f621, f592, 0f3ED4B147, f617;
fma.rn.f32 f622, f593, 0fBF68DDA4, f618;
fma.rn.f32 f623, f595, 0fBE11BAFB, f619;
fma.rn.f32 f624, f598, 0fBF7D64F0, f620;
fma.rn.f32 f625, f596, 0fBE11BAFB, f621;
fma.rn.f32 f626, f597, 0fBF7D64F0, f622;
fma.rn.f32 f627, f599, 0fBF27A4F4, f623;
fma.rn.f32 f628, f602, 0fBF4178CE, f624;
fma.rn.f32 f629, f600, 0fBF27A4F4, f625;
fma.rn.f32 f630, f601, 0fBF4178CE, f626;
fma.rn.f32 f631, f603, 0fBF75A155, f627;
fma.rn.f32 f632, f606, 0fBE903F40, f628;
fma.rn.f32 f633, f604, 0fBF75A155, f629;
fma.rn.f32 f634, f605, 0fBE903F40, f630;
fma.rn.f32 f635, f587, 0f3ED4B147, f565;
fma.rn.f32 f636, f590, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f637, f588, 0f3ED4B147, f576;
fma.rn.f32 f638, f589, 0fBF68DDA4, 0f00000000;
fma.rn.f32 f639, f591, 0fBF27A4F4, f635;
fma.rn.f32 f640, f594, 0fBF4178CE, f636;
fma.rn.f32 f641, f592, 0fBF27A4F4, f637;
fma.rn.f32 f642, f593, 0fBF4178CE, f638;
fma.rn.f32 f643, f595, 0fBF75A155, f639;
fma.rn.f32 f644, f598, 0f3E903F40, f640;
fma.rn.f32 f645, f596, 0fBF75A155, f641;
fma.rn.f32 f646, f597, 0f3E903F40, f642;
fma.rn.f32 f647, f599, 0fBE11BAFB, f643;
fma.rn.f32 f648, f602, 0f3F7D64F0, f644;
fma.rn.f32 f649, f600, 0fBE11BAFB, f645;
fma.rn.f32 f650, f601, 0f3F7D64F0, f646;
fma.rn.f32 f651, f603, 0f3F575C64, f647;
fma.rn.f32 f652, f606, 0f3F0A6770, f648;
fma.rn.f32 f653, f604, 0f3F575C64, f649;
fma.rn.f32 f654, f605, 0f3F0A6770, f650;
fma.rn.f32 f655, f587, 0fBE11BAFB, f565;
fma.rn.f32 f656, f590, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f657, f588, 0fBE11BAFB, f576;
fma.rn.f32 f658, f589, 0fBF7D64F0, 0f00000000;
fma.rn.f32 f659, f591, 0fBF75A155, f655;
fma.rn.f32 f660, f594, 0f3E903F40, f656;
fma.rn.f32 f661, f592, 0fBF75A155, f657;
fma.rn.f32 f662, f593, 0f3E903F40, f658;
fma.rn.f32 f663, f595, 0f3ED4B147, f659;
fma.rn.f32 f664, f598, 0f3F68DDA4, f660;
fma.rn.f32 f665, f596, 0f3ED4B147, f661;
fma.rn.f32 f666, f597, 0f3F68DDA4, f662;
fma.rn.f32 f667, f599, 0f3F575C64, f663;
fma.rn.f32 f668, f602, 0fBF0A6770, f664;
fma.rn.f32 f669, f600, 0f3F575C64, f665;
fma.rn.f32 f670, f601, 0fBF0A6770, f666;
fma.rn.f32 f671, f603, 0fBF27A4F4, f667;
fma.rn.f32 f672, f606, 0fBF4178CE, f668;
fma.rn.f32 f673, f604, 0fBF27A4F4, f669;
fma.rn.f32 f674, f605, 0fBF4178CE, f670;
fma.rn.f32 f675, f587, 0fBF27A4F4, f565;
fma.rn.f32 f676, f590, 0fBF4178CE, 0f00000000;
fma.rn.f32 f677, f588, 0fBF27A4F4, f576;
fma.rn.f32 f678, f589, 0fBF4178CE, 0f00000000;
fma.rn.f32 f679, f591, 0fBE11BAFB, f675;
fma.rn.f32 f680, f594, 0f3F7D64F0, f676;
fma.rn.f32 f681, f592, 0fBE11BAFB, f677;
fma.rn.f32 f682, f593, 0f3F7D64F0, f678;
fma.rn.f32 f683, f595, 0f3F575C64, f679;
fma.rn.f32 f684, f598, 0fBF0A6770, f680;
fma.rn.f32 f685, f596, 0f3F575C64, f681;
fma.rn.f32 f686, f597, 0fBF0A6770, f682;
fma.rn.f32 f687, f599, 0fBF75A155, f683;
fma.rn.f32 f688, f602, 0fBE903F40, f684;
fma.rn.f32 f689, f600, 0fBF75A155, f685;
fma.rn.f32 f690, f601, 0fBE903F40, f686;
fma.rn.f32 f691, f603, 0f3ED4B147, f687;
fma.rn.f32 f692, f606, 0f3F68DDA4, f688;
fma.rn.f32 f693, f604, 0f3ED4B147, f689;
fma.rn.f32 f694, f605, 0f3F68DDA4, f690;
fma.rn.f32 f695, f587, 0fBF75A155, f565;
fma.rn.f32 f696, f590, 0fBE903F40, 0f00000000;
fma.rn.f32 f697, f588, 0fBF75A155, f576;
fma.rn.f32 f698, f589, 0fBE903F40, 0f00000000;
fma.rn.f32 f699, f591, 0f3F575C64, f695;
fma.rn.f32 f700, f594, 0f3F0A6770, f696;
fma.rn.f32 f701, f592, 0f3F575C64, f697;
fma.rn.f32 f702, f593, 0f3F0A6770, f698;
fma.rn.f32 f703, f595, 0fBF27A4F4, f699;
fma.rn.f32 f704, f598, 0fBF4178CE, f700;
fma.rn.f32 f705, f596, 0fBF27A4F4, f701;
fma.rn.f32 f706, f597, 0fBF4178CE, f702;
fma.rn.f32 f707, f599, 0f3ED4B147, f703;
fma.rn.f32 f708, f602, 0f3F68DDA4, f704;
fma.rn.f32 f709, f600, 0f3ED4B147, f705;
fma.rn.f32 f710, f601, 0f3F68DDA4, f706;
fma.rn.f32 f711, f603, 0fBE11BAFB, f707;
fma.rn.f32 f712, f606, 0fBF7D64F0, f708;
fma.rn.f32 f713, f604, 0fBE11BAFB, f709;
fma.rn.f32 f714, f605, 0fBF7D64F0, f710;
add.f32 %0, f613, f603;
add.f32 %1, f614, f604;
add.f32 %3, f634, f633;
sub.f32 %2, f631, f632;
add.f32 %5, f654, f653;
sub.f32 %4, f651, f652;
add.f32 %7, f674, f673;
sub.f32 %6, f671, f672;
add.f32 %9, f694, f693;
sub.f32 %8, f691, f692;
add.f32 %11, f714, f713;
sub.f32 %10, f711, f712;
sub.f32 %13, f713, f714;
add.f32 %12, f712, f711;
sub.f32 %15, f693, f694;
add.f32 %14, f692, f691;
sub.f32 %17, f673, f674;
add.f32 %16, f672, f671;
sub.f32 %19, f653, f654;
add.f32 %18, f652, f651;
sub.f32 %21, f633, f634;
add.f32 %20, f632, f631;
})"
     : "=f"(rmem[0].x), "=f"(rmem[0].y), "=f"(rmem[1].x), "=f"(rmem[1].y), "=f"(rmem[2].x), "=f"(rmem[2].y), "=f"(rmem[3].x), "=f"(rmem[3].y), "=f"(rmem[4].x), "=f"(rmem[4].y), "=f"(rmem[5].x), "=f"(rmem[5].y), "=f"(rmem[6].x), "=f"(rmem[6].y), "=f"(rmem[7].x), "=f"(rmem[7].y), "=f"(rmem[8].x), "=f"(rmem[8].y), "=f"(rmem[9].x), "=f"(rmem[9].y), "=f"(rmem[10].x), "=f"(rmem[10].y): "r"(smem), "l"(lut_sp_11_1331), "l"(lut_sp_11_121), "f"(rmem[0].x), "f"(rmem[0].y), "f"(rmem[1].x), "f"(rmem[1].y), "f"(rmem[1].y), "f"(rmem[2].x), "f"(rmem[2].y), "f"(rmem[2].y), "f"(rmem[3].x), "f"(rmem[3].y), "f"(rmem[4].x), "f"(rmem[4].y), "f"(rmem[4].y), "f"(rmem[5].x), "f"(rmem[5].y), "f"(rmem[5].y), "f"(rmem[6].x), "f"(rmem[6].y), "f"(rmem[7].x), "f"(rmem[7].y), "f"(rmem[7].y), "f"(rmem[8].x), "f"(rmem[8].y), "f"(rmem[8].y), "f"(rmem[9].x), "f"(rmem[9].y), "f"(rmem[10].x), "f"(rmem[10].y));
};


#endif
